Nicula Florin (Gr 406), Orosanu Claudiu (Gr 405)
09 January 2019
Exemplu: retele de socializare
library(tidyverse)
library(igraph)
library(statnet)load("union_edges.RData")
load("union_characters.RData")head(union_characters)## name male culture house popularity house2 color
## 1 Alys Arryn 0 <NA> House Arryn 0.08026756 <NA> <NA>
## 2 Elys Waynwood 0 <NA> House Waynwood 0.07023411 <NA> <NA>
## 3 Jasper Arryn 1 <NA> House Arryn 0.04347826 <NA> <NA>
## 4 Jeyne Royce 0 <NA> House Royce 0.00000000 <NA> <NA>
## 5 Jon Arryn 1 Valemen House Arryn 0.83612040 <NA> <NA>
## 6 Lysa Arryn 0 <NA> House Tully 0.00000000 House Tully #F781BF
## shape
## 1 circle
## 2 circle
## 3 square
## 4 circle
## 5 square
## 6 circle
head(union_edges)## source target type color lty
## 1 Lysa Arryn Robert Arryn mother #7570B3 solid
## 2 Jasper Arryn Alys Arryn father #1B9E77 solid
## 3 Jasper Arryn Jon Arryn father #1B9E77 solid
## 4 Jon Arryn Robert Arryn father #1B9E77 solid
## 110 Cersei Lannister Tommen Baratheon mother #7570B3 solid
## 210 Cersei Lannister Joffrey Baratheon mother #7570B3 solid
Muchiile reprezinta:
Muchiile punctate sunt relatii care exista doar in serialul TV, nu si in carti.
union_graph <- graph_from_data_frame(union_edges, directed = TRUE, vertices = union_characters)
union_graph## IGRAPH 845f119 DN-- 208 404 --
## + attr: name (v/c), male (v/n), culture (v/c), house (v/c),
## | popularity (v/n), house2 (v/c), color (v/c), shape (v/c), type
## | (e/c), color (e/c), lty (e/c)
## + edges from 845f119 (vertex names):
## [1] Lysa Arryn ->Robert Arryn
## [2] Jasper Arryn ->Alys Arryn
## [3] Jasper Arryn ->Jon Arryn
## [4] Jon Arryn ->Robert Arryn
## [5] Cersei Lannister ->Tommen Baratheon
## [6] Cersei Lannister ->Joffrey Baratheon
## + ... omitted several edges
color_vertices <- union_characters %>%
group_by(house, color) %>%
summarise(n = n()) %>%
filter(!is.na(color))
head(color_vertices)## # A tibble: 6 x 3
## # Groups: house [6]
## house color n
## <chr> <I(chr)> <int>
## 1 House Baratheon #E41A1C 10
## 2 House Frey #377EB8 20
## 3 House Greyjoy #4DAF4A 14
## 4 House Lannister #984EA3 26
## 5 House Martell #FF7F00 13
## 6 House Stark #FFFF33 26
colors_edges <- union_edges %>%
group_by(type, color) %>%
summarise(n = n()) %>%
filter(!is.na(color))
head(colors_edges)## # A tibble: 4 x 3
## # Groups: type [4]
## type color n
## <chr> <I(chr)> <int>
## 1 father #1B9E77 129
## 2 father/mother #D95F02 1
## 3 mother #7570B3 118
## 4 spouse #E7298A 156
Functiile de mai sus sunt din pachetul dplyr si permit manipularea datelor (grupari, agregari, filtrari)
Folosim un layout de tip Fruchterman-Reingold.
layout <- layout_with_fr(union_graph)plot(union_graph,
layout = layout,
vertex.label = gsub(" ", "\n", V(union_graph)$name),
vertex.shape = V(union_graph)$shape,
vertex.color = V(union_graph)$color,
vertex.size = (V(union_graph)$popularity + 0.5) * 5,
vertex.frame.color = "gray",
vertex.label.color = "black",
vertex.label.cex = 0.8,
edge.arrow.size = 0.5,
edge.color = E(union_graph)$color,
edge.lty = E(union_graph)$lty)
legend("topleft", legend = c(NA, "Node color:", as.character(color_vertices$house), NA, "Edge color:", as.character(colors_edges$type)), pch = 19,
col = c(NA, NA, color_vertices$color, NA, NA, colors_edges$color), pt.cex = 5, cex = 2, bty = "n", ncol = 1,
title = "")
legend("topleft", legend = "", cex = 4, bty = "n", ncol = 1,
title = "Game of Thrones Family Ties")Centralitatea unui nod poate fi calculata in functie de:
Centralitatea calculata in functie de gradul nodului depinde de cate muchii intra/ies in/din nodul respectiv. Practic, cu cat este mai mare numarul de muchii adiacente unui nod, cu atat este mai mare centralitatea nodului.
union_graph_undir <- as.undirected(union_graph, mode = "collapse")union_graph_undir_degree <- igraph::degree(union_graph_undir, mode = "total")
#standardized by number of nodes
union_graph_undir_degree_std <- union_graph_undir_degree / (vcount(union_graph_undir) - 1)node_degree <- data.frame(degree = union_graph_undir_degree,
degree_std = union_graph_undir_degree_std) %>%
tibble::rownames_to_column()
union_characters <- left_join(union_characters, node_degree, by = c("name" = "rowname"))
node_degree %>%
arrange(-degree) %>%
.[1:10, ]## rowname degree degree_std
## 1 Quellon Greyjoy 12 0.05797101
## 2 Walder Frey 10 0.04830918
## 3 Oberyn Martell 10 0.04830918
## 4 Eddard Stark 9 0.04347826
## 5 Catelyn Stark 8 0.03864734
## 6 Emmon Frey 7 0.03381643
## 7 Genna Lannister 7 0.03381643
## 8 Merrett Frey 7 0.03381643
## 9 Balon Greyjoy 7 0.03381643
## 10 Jason Lannister 7 0.03381643
Dupa acest criteriu, Quellon Greyjoy ar fi cel mai important personaj, prin prisma faptului ca a avut multi copii (i-au placut femeile). Totusi, majoritatea fanilor GOT nu l-ar considera prea important (probabil ar intreba “Cine mai e si asta? N-am auzit de el”).
Apropierea - distanta unui nod fata de toate celelalte noduri. Un nod cu apropierea mare este mai central si poate imparti informatie cu multe noduri.
closeness <- igraph::closeness(union_graph_undir, mode = "total")
#standardized by number of nodes
closeness_std <- closeness / (vcount(union_graph_undir) - 1)node_closeness <- data.frame(closeness = closeness,
closeness_std = closeness_std) %>%
tibble::rownames_to_column()
union_characters <- left_join(union_characters, node_closeness, by = c("name" = "rowname"))
node_closeness %>%
arrange(-closeness) %>%
.[1:10, ]## rowname closeness closeness_std
## 1 Sansa Stark 0.0002013288 9.726028e-07
## 2 Tyrion Lannister 0.0002012882 9.724070e-07
## 3 Tywin Lannister 0.0002011668 9.718201e-07
## 4 Joanna Lannister 0.0002005616 9.688965e-07
## 5 Eddard Stark 0.0002002804 9.675381e-07
## 6 Catelyn Stark 0.0001986492 9.596579e-07
## 7 Cersei Lannister 0.0001984915 9.588960e-07
## 8 Jaime Lannister 0.0001975894 9.545382e-07
## 9 Jeyne Marbrand 0.0001966568 9.500330e-07
## 10 Tytos Lannister 0.0001966568 9.500330e-07
Putem observa ca, calculul centralitatii in functie de apropiere ilustreaza mult mai bine importanta caracterelor. Caracterele cele mai importante sunt, in general, cele care apar in cele mai multe evenimente din poveste.
Betweenness - cate drumuri de cost minim trec prin nodul/muchia respectiv/a?
Caracterele cheie, care fac legatura dintre case, au acest coeficient marit.
betweenness <- igraph::betweenness(union_graph_undir, directed = FALSE)
# standardize by number of node pairs
betweenness_std <- betweenness / ((vcount(union_graph_undir) - 1) * (vcount(union_graph_undir) - 2) / 2)
node_betweenness <- data.frame(betweenness = betweenness,
betweenness_std = betweenness_std) %>%
tibble::rownames_to_column()
union_characters <- left_join(union_characters, node_betweenness, by = c("name" = "rowname"))
node_betweenness %>%
arrange(-betweenness) %>%
.[1:10, ]## rowname betweenness betweenness_std
## 1 Eddard Stark 6926.864 0.3248846
## 2 Sansa Stark 6165.667 0.2891828
## 3 Tyrion Lannister 5617.482 0.2634718
## 4 Tywin Lannister 5070.395 0.2378123
## 5 Joanna Lannister 4737.524 0.2221999
## 6 Rhaegar Targaryen 4301.583 0.2017533
## 7 Margaery Tyrell 4016.417 0.1883784
## 8 Jon Snow 3558.884 0.1669192
## 9 Mace Tyrell 3392.500 0.1591154
## 10 Jason Lannister 3068.500 0.1439191
edge_betweenness <- igraph::edge_betweenness(union_graph_undir, directed = FALSE)
data.frame(edge = attr(E(union_graph_undir), "vnames"),
betweenness = edge_betweenness) %>%
tibble::rownames_to_column() %>%
arrange(-betweenness) %>%
.[1:10, ]## rowname edge betweenness
## 1 160 Sansa Stark|Tyrion Lannister 5604.149
## 2 207 Sansa Stark|Eddard Stark 4709.852
## 3 212 Rhaegar Targaryen|Jon Snow 3560.083
## 4 296 Margaery Tyrell|Mace Tyrell 3465.000
## 5 213 Eddard Stark|Jon Snow 3163.048
## 6 131 Jason Lannister|Joanna Lannister 3089.500
## 7 159 Joanna Lannister|Tyrion Lannister 2983.591
## 8 171 Tyrion Lannister|Tywin Lannister 2647.224
## 9 192 Elia Martell|Rhaegar Targaryen 2580.000
## 10 300 Luthor Tyrell|Mace Tyrell 2565.000
Ned Stark se claseaza cel mai bine aici, ceea ce este plauzibil, deoarece el si copiii lui (mai ales Sansa) conecteaza cele mai importante familii.
plot(union_graph_undir,
layout = layout,
vertex.label = gsub(" ", "\n", V(union_graph_undir)$name),
vertex.shape = V(union_graph_undir)$shape,
vertex.color = V(union_graph_undir)$color,
vertex.size = betweenness * 0.001,
vertex.frame.color = "gray",
vertex.label.color = "black",
vertex.label.cex = 0.8,
edge.width = edge_betweenness * 0.01,
edge.arrow.size = 0.5,
edge.color = E(union_graph_undir)$color,
edge.lty = E(union_graph_undir)$lty)
legend("topleft", legend = c("Node color:", as.character(color_vertices$house), NA, "Edge color:", as.character(colors_edges$type)), pch = 19,
col = c(NA, color_vertices$color, NA, NA, colors_edges$color), pt.cex = 5, cex = 2, bty = "n", ncol = 1)Diametrul retelei - lungimea celui mai lung drum de cost minim
diameter(union_graph_undir, directed = FALSE)## [1] 21
union_graph_undir_diameter <- union_graph_undir
node_diameter <- get.diameter(union_graph_undir_diameter, directed = FALSE)
V(union_graph_undir_diameter)$color <- scales::alpha(V(union_graph_undir_diameter)$color, alpha = 0.5)
V(union_graph_undir_diameter)$size <- 2
V(union_graph_undir_diameter)[node_diameter]$color <- "red"
V(union_graph_undir_diameter)[node_diameter]$size <- 5
E(union_graph_undir_diameter)$color <- "grey"
E(union_graph_undir_diameter)$width <- 1
E(union_graph_undir_diameter, path = node_diameter)$color <- "red"
E(union_graph_undir_diameter, path = node_diameter)$width <- 5
plot(union_graph_undir_diameter,
layout = layout,
vertex.label = gsub(" ", "\n", V(union_graph_undir_diameter)$name),
vertex.shape = V(union_graph_undir_diameter)$shape,
vertex.frame.color = "gray",
vertex.label.color = "black",
vertex.label.cex = 0.8,
edge.arrow.size = 0.5,
edge.lty = E(union_graph_undir_diameter)$lty)
legend("topleft", legend = c("Node color:", as.character(color_vertices$house), NA, "Edge color:", as.character(colors_edges$type)), pch = 19,
col = c(NA, color_vertices$color, NA, NA, colors_edges$color), pt.cex = 5, cex = 2, bty = "n", ncol = 1)Tranzitivitatea masoara probabilitatea ca muchiile adiacente a nodurilor sunt conectate. Este de asemenea numita coeficientul de clusterizare.
transitivity(union_graph_undir, type = "global")## [1] 0.2850679
transitivity <- data.frame(name = V(union_graph_undir)$name,
transitivity = transitivity(union_graph_undir, type = "local")) %>%
mutate(name = as.character(name))
union_characters <- left_join(union_characters, transitivity, by = "name")
transitivity %>%
arrange(-transitivity) %>%
.[1:10, ]## name transitivity
## 1 Robert Arryn 1
## 2 Ormund Baratheon 1
## 3 Selyse Florent 1
## 4 Shireen Baratheon 1
## 5 Amarei Crakehall 1
## 6 Marissa Frey 1
## 7 Olyvar Frey 1
## 8 Perra Royce 1
## 9 Perwyn Frey 1
## 10 Tion Frey 1
Deoarece graficul nostru este un arbore genealogic, caracterele cu tranzitivitatea de 1 formeaza un triunghi cu parintii sau copii.
Este un algoritm prin care se stabilesc cele mai importante noduri din retea prin prisma faptului ca acestea contin multe muchii. Acset algoritm a fost dezvoltat prima data de catre Google.
page_rank <- page.rank(union_graph_undir, directed = FALSE)
page_rank_centrality <- data.frame(name = names(page_rank$vector),
page_rank = page_rank$vector) %>%
mutate(name = as.character(name))
union_characters <- left_join(union_characters, page_rank_centrality, by = "name")
page_rank_centrality %>%
arrange(-page_rank) %>%
.[1:10, ]## name page_rank
## 1 Oberyn Martell 0.018402407
## 2 Quellon Greyjoy 0.016128129
## 3 Walder Frey 0.012956029
## 4 Eddard Stark 0.011725019
## 5 Cregan Stark 0.010983561
## 6 Catelyn Stark 0.010555473
## 7 Lyarra Stark 0.009876629
## 8 Aegon V Targaryen 0.009688458
## 9 Balon Greyjoy 0.009647049
## 10 Jon Arryn 0.009623742
In cazul nostru Oberyn Martell, Quellon Greyjoy si Walder Frey sunt cele mai importante noduri pentru ca au numarul cel mai mare de sotii, copii si nepoti.
adjacency <- as.matrix(as_adjacency_matrix(union_graph_undir))Folosind matricea de adiacenta putem calcula eigenvalues(scalari ai vectoriilor) si eigenvectors(vectorii caracteristici) matricei.
#degree diagonal matrix
degree_diag <- diag(1 / igraph::degree(union_graph_undir))
# PageRank matrix
pagerank <- adjacency %*% degree_diag
eigenvalues <- eigen(pagerank)Vectorul (eigenvector) cu cel mai mare scalar (eigenvalue) ofera o mai mare importanta nodului de care apartine deaorece este conectat la alte noduri cu mule muchii.
eigenvector <- data.frame(name = rownames(pagerank),
eigenvector = as.numeric(eigenvalues$vectors[, which.max(eigenvalues$values)]))
union_characters <- left_join(union_characters, eigenvector, by = "name")
eigenvector %>%
arrange(eigenvector) %>%
.[1:10, ]## name eigenvector
## 1 Quellon Greyjoy -0.6625628
## 2 Balon Greyjoy -0.3864950
## 3 Lady of House Sunderly -0.3312814
## 4 Alannys Harlaw -0.2760678
## 5 Lady of House Stonetree -0.2208543
## 6 Asha (Yara) Greyjoy -0.1656407
## 7 Robin Greyjoy -0.1104271
## 8 Euron Greyjoy -0.1104271
## 9 Urrigon Greyjoy -0.1104271
## 10 Victarion Greyjoy -0.1104271
Familia care are cele mai multe conexiuni este Greyjoy (eigenvalue).
Tywin si partea importanta a familiei Lannister este cel mai importanta vector (eigenvector).
eigen_centrality <- igraph::eigen_centrality(union_graph_undir, directed = FALSE)
eigen_centrality <- data.frame(name = names(eigen_centrality$vector),
eigen_centrality = eigen_centrality$vector) %>%
mutate(name = as.character(name))
union_characters <- left_join(union_characters, eigen_centrality, eigenvector, by = "name")
eigen_centrality %>%
arrange(-eigen_centrality) %>%
.[1:10, ]## name eigen_centrality
## 1 Tywin Lannister 1.0000000
## 2 Cersei Lannister 0.9168980
## 3 Joanna Lannister 0.8358122
## 4 Tytos Lannister 0.8190076
## 5 Jeyne Marbrand 0.8190076
## 6 Genna Lannister 0.7788376
## 7 Jaime Lannister 0.7642870
## 8 Robert Baratheon 0.7087042
## 9 Emmon Frey 0.6538709
## 10 Walder Frey 0.6516021
Putem compara acum toata informatia stocata la nivelul nodurilor pentru a decide care este cel mai important personaj in Game of Thrones.
Vom afisa toate personajele caselor importante
union_characters %>%
filter(!is.na(house2)) %>%
dplyr::select(-contains("_std")) %>%
gather(x, y, degree:eigen_centrality) %>%
ggplot(aes(x = name, y = y, color = house2)) +
geom_point(size = 3) +
facet_grid(x ~ house2, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))Vom observa ca familiile Stark si Lannister sunt cele mai importante conexiuni ale familiilor in GoT.
Putem grupa nodurile numarul de clicuri care se afla intre mai multe noduri. De asemenea putem calcula numarul de cai sau cicluri de orice lungime. Pentru muchii putem obtine o suma a tuturor cailor si ciclurilor pana la lungimea maxima. Iar pentru noduri putem obtine numarul de cai sau cicluri de care aprtine acestea.
node_kpath <- kpath.census(adjacency, maxlen = 5, mode = "graph", tabulate.by.vertex = TRUE, dyadic.tabulation = "sum")
edge_kpath <- kpath.census(adjacency, maxlen = 5, mode = "graph", tabulate.by.vertex = FALSE)
edge_kpath## $path.count
## 1 2 3 4 5
## 326 1105 2973 7183 17026
gplot(node_kpath$paths.bydyad,
label.cex = 0.5,
vertex.cex = 0.75,
displaylabels = TRUE,
edge.col = "grey")node_kcycle <- kcycle.census(adjacency, maxlen = 8, mode = "graph", tabulate.by.vertex = TRUE, cycle.comembership = "sum")
edge_kcycle <- kcycle.census(adjacency, maxlen = 8, mode = "graph", tabulate.by.vertex = FALSE)
edge_kcycle## $cycle.count
## 2 3 4 5 6 7 8
## 0 105 136 27 57 58 86
node_kcycle_reduced <- node_kcycle$cycle.comemb
node_kcycle_reduced <- node_kcycle_reduced[which(rowSums(node_kcycle_reduced) > 0), which(colSums(node_kcycle_reduced) > 0)]
gplot(node_kcycle_reduced,
label.cex = 0.5,
vertex.cex = 0.75,
displaylabels = TRUE,
edge.col = "grey")Pentru a stabili numarul maxim de noduri adiacente vom folosi “clique.census()”
node_clique <- clique.census(adjacency, mode = "graph", tabulate.by.vertex = TRUE, clique.comembership = "sum")
edge_clique <- clique.census(adjacency, mode = "graph", tabulate.by.vertex = FALSE, clique.comembership = "sum")
edge_clique$clique.count## 1 2 3
## 0 74 105
node_clique_reduced <- node_clique$clique.comemb
node_clique_reduced <- node_clique_reduced[which(rowSums(node_clique_reduced) > 0), which(colSums(node_clique_reduced) > 0)]
gplot(node_clique_reduced,
label.cex = 0.5,
vertex.cex = 0.75,
displaylabels = TRUE,
edge.col = "grey")De asemenea putem sa selectam grupuri din reteaua noastra prin clusterizarea nodul grupurilor in functie de muchiile lor
ceb <- cluster_edge_betweenness(union_graph_undir)
modularity(ceb)## [1] 0.8359884
plot(ceb,
union_graph_undir,
layout = layout,
vertex.label = gsub(" ", "\n", V(union_graph_undir)$name),
vertex.shape = V(union_graph_undir)$shape,
vertex.size = (V(union_graph_undir)$popularity + 0.5) * 5,
vertex.frame.color = "gray",
vertex.label.color = "black",
vertex.label.cex = 0.8)Sau folosind “propagating labels” pentru a gasi comunitati
clp <- cluster_label_prop(union_graph_undir)
plot(clp,
union_graph_undir,
layout = layout,
vertex.label = gsub(" ", "\n", V(union_graph_undir)$name),
vertex.shape = V(union_graph_undir)$shape,
vertex.size = (V(union_graph_undir)$popularity + 0.5) * 5,
vertex.frame.color = "gray",
vertex.label.color = "black",
vertex.label.cex = 0.8)Putem afla matricea de adiacenta folosind si functii precum GenInd() din libraria NetIndices. Aceasta functie returneaza proprietati precum: numarul de compartimente (N), inputul total a sistemului (T..), fluxul total a sistemului (TST), numarul intern de legaturi (Lint), numarul total de legaturi (Ltot), densitatea (LD), nivelul de conectare (C)
library(NetIndices)
graph.properties <- GenInd(adjacency)
graph.properties## $N
## [1] 208
##
## $T..
## [1] 652
##
## $TST
## [1] 652
##
## $Lint
## [1] 652
##
## $Ltot
## [1] 652
##
## $LD
## [1] 3.134615
##
## $C
## [1] 0.01514307
##
## $Tijbar
## [1] 1
##
## $TSTbar
## [1] 3.134615
##
## $Cbar
## [1] 0.01086163
sau:
library(network)
adj_network <- network(adjacency, directed = TRUE)
adj_network## Network attributes:
## vertices = 208
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges= 652
## missing edges= 0
## non-missing edges= 652
##
## Vertex attribute names:
## vertex.names
##
## No edge attributes
equiv.clust calculeaza hierarhic nivelul de clusterizare folosind pozitiile din retea
ec <- equiv.clust(adj_network, mode = "graph", cluster.method = "average", plabels = network.vertex.names(adj_network))
ec## Position Clustering:
##
## Equivalence function: sedist
## Equivalence metric: hamming
## Cluster method: average
## Graph order: 208
ec$cluster$labels <- ec$plabels
plot(ec)